##########################
##########################
###Mediation: High Risk###
##########################
##########################
# Set RNG
set.seed(123)

# subset na rm data
med.dat <- na.omit(full.2003.2018[,c("gid","clear_zoon_confirm","lagclear_zoon_confirm","Forest.coverage_bin","ag_crops","fires.cell.count_anom","NDVI_mean_anom","log.nl","logged.pop","month")])
#Create regular dummies for the mediate package
unique_month <- unique(med.dat$month)
#run loop 
for (month in unique_month) {
  new_col_name <- paste0("f_month", month)  # Create a column name
  med.dat[[new_col_name]] <- ifelse(med.dat$month == month, 1, 0)  # Assign 1 if matched, else 0
}

#Remove the largest (cities) and smallest population areas
#Extract population size
med.dat$pop <- exp(med.dat$logged.pop-1)
#subet data
med.dat <- subset(med.dat, (pop>=1000 & pop <50000))


#############
##Ag. areas##
#############
# subset only ag areas
med.dat.ag <- subset(med.dat, (ag_crops>0))

#Run base model
med.fit1 <- lm(NDVI_mean_anom ~ fires.cell.count_anom+
                 lagclear_zoon_confirm+log.nl +logged.pop+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(med.fit1)
#CSEs
med.cse1 <- coeftest(med.fit1, vcov. = vcovCL(med.fit1, cluster = med.dat.ag$gid, type = "HC0"))

#Run fit model
out.fit1 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+fires.cell.count_anom+
                 lagclear_zoon_confirm+log.nl + logged.pop+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(out.fit1)
#CSEs
out.cse1 <- coeftest(out.fit1, vcov. = vcovCL(out.fit1, cluster = med.dat.ag$gid, type = "HC0"))

#Set seed
set.seed(123)
#Create a loop for simulations, accommodating memory limits
sim_results_hs <- list()
for (i in 1:10) {  # Split 1000 sims into 10 runs of 100 each
  sim_results_hs[[i]] <- mediate(med.fit1, out.fit1, 
                                 treat = "fires.cell.count_anom", 
                                 mediator = "NDVI_mean_anom",
                                 sims = 100, cluster = med.dat.ag$gid)
  gc()  # Garbage collection after each iteration
}

#Extract average p values
pvals_ag <- med_pvals(sim_results_hs)

################
##Forest areas##
################
#Subset only forest data
med.dat.for <- subset(med.dat, (Forest.coverage_bin>0))

#Run base model
med.fit2 <- lm(NDVI_mean_anom ~ fires.cell.count_anom+
                 lagclear_zoon_confirm+log.nl + logged.pop+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(med.fit2)
#CSEs
med.cse2 <- coeftest(med.fit2, vcov. = vcovCL(med.fit2, cluster = med.dat.for$gid, type = "HC0"))

#Run fit model
out.fit2 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+fires.cell.count_anom+
                 lagclear_zoon_confirm+log.nl + logged.pop+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(out.fit2)
#CSEs
out.cse2 <- coeftest(out.fit2, vcov. = vcovCL(out.fit2, cluster = med.dat.for$gid, type = "HC0"))

#Set seed
set.seed(123)
#Create a loop for simulations, accommodating memory limits
sim_results_hs2 <- list()
for (i in 1:10) {  # Split 1000 sims into 10 runs of 100 each
  sim_results_hs2[[i]] <- mediate(med.fit2, out.fit2, 
                                  treat = "fires.cell.count_anom", 
                                  mediator = "NDVI_mean_anom",
                                  sims = 100, cluster = med.dat.for$gid)
  gc()  # Garbage collection after each iteration
}

#Extract average p values
pvals_for <- med_pvals(sim_results_hs2)

######################
### All other areas###
######################
#Subset all other areas 
med.dat.other <- subset(med.dat, (Forest.coverage_bin==0|ag_crops==0))

#Run base model
med.fit3 <- lm(NDVI_mean_anom ~ fires.cell.count_anom+log.nl + logged.pop+
                 lagclear_zoon_confirm+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.other)
summary(med.fit3)
#CSEs
med.cse3 <- coeftest(med.fit3, vcov. = vcovCL(med.fit3, cluster = med.dat.other$gid, type = "HC0"))

#Run fit model
out.fit3 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+fires.cell.count_anom+
                 lagclear_zoon_confirm+log.nl + logged.pop+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.other)
summary(out.fit3)
#CSEs
out.cse3 <- coeftest(out.fit3, vcov. = vcovCL(out.fit3, cluster = med.dat.other$gid, type = "HC0"))


#Set seed
set.seed(123)
#Create a loop for simulations, accommodating memory limits
sim_results_hs3 <- list()
for (i in 1:20) {  # Split 1000 sims into 20 runs of 50 each
  sim_results_hs3[[i]] <- mediate(med.fit3, out.fit3, 
                                  treat = "fires.cell.count_anom", 
                                  mediator = "NDVI_mean_anom",
                                  sims = 50, cluster = med.dat.other$gid)
  gc()  # Garbage collection after each iteration
}

#Extract average p values
pvals_oth <- med_pvals(sim_results_hs3)

#Combine all average p-values
pvals.hs <- cbind(pvals_ag, pvals_for, pvals_oth)
pvals.hs

##Combine all estimates together
sim_results_list <- list(sim_results_hs, sim_results_hs2, sim_results_hs3)

#Generate plots -- model name "main"
med_plots(model.name="HS", sim_results_list=sim_results_list)

##Export mediation models
# Open a file connection
sink("tableHS.txt")

# --- First: Export full regression tables (to LaTeX or text) ---
stargazer(med.cse1, out.cse1, med.cse2, out.cse2, med.cse3, out.cse3,
          type = "text", 
          star.cutoffs = c(0.05, 0.01), 
          style = "default")

cat("\n\n% --- Goodness-of-fit stats only ---\n\n")

# --- Second: Export only N, R², and Adjusted R² ---
stargazer(med.fit1, out.fit1, med.fit2, out.fit2, med.fit3, out.fit3,
          type = "text", 
          keep.stat = c("n", "rsq", "adj.rsq"),
          omit = ".*")

# Close the file
sink()
